Last updated: 2018-06-07
workflowr checks: (Click a bullet for more information) ✔ R Markdown file: up-to-date
Great! Since the R Markdown file has been committed to the Git repository, you know the exact version of the code that produced these results.
✔ Environment: empty
Great job! The global environment was empty. Objects defined in the global environment can affect the analysis in your R Markdown file in unknown ways. For reproduciblity it’s best to always run the code in an empty environment.
✔ Seed:
set.seed(20180607)
The command set.seed(20180607) was run prior to running the code in the R Markdown file. Setting a seed ensures that any results that rely on randomness, e.g. subsampling or permutations, are reproducible.
✔ Session information: recorded
Great job! Recording the operating system, R version, and package versions is critical for reproducibility.
✔ Repository version: 35ad9e1
wflow_publish or wflow_git_commit). workflowr only checks the R Markdown file, but you know if there are other scripts or data files that it depends on. Below is the status of the Git repository when the results were generated:
Ignored files:
Ignored: .Rproj.user/
Note that any generated files, e.g. HTML, png, CSS, etc., are not included in this status report because it is ok for generated content to have uncommitted changes.
rm(list=ls())
library(plyr)
library(ggplot2)
library(RColorBrewer)
library(reshape2)
library(mvtnorm)
library(corrplot)
corrplot 0.84 loaded
library(data.table) #fread
Attaching package: 'data.table'
The following objects are masked from 'package:reshape2':
dcast, melt
library(plotly)
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following objects are masked from 'package:plyr':
arrange, mutate, rename, summarise
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
library(pheatmap)
library(heatmaply)
Loading required package: viridis
Loading required package: viridisLite
======================
Welcome to heatmaply version 0.15.0
Type citation('heatmaply') for how to cite the package.
Type ?heatmaply for the main documentation.
The github page is: https://github.com/talgalili/heatmaply/
Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
Or contact: <tal.galili@gmail.com>
======================
color.vec <- brewer.pal(8, "Set1")
FIQT <- function(z=z, min.p=10^-300){
pvals<-2*pnorm(abs(z),low=F)
pvals[pvals<min.p]<- min.p
adj.pvals<-p.adjust(pvals,method="fdr")
mu.z<-sign(z)*qnorm(adj.pvals/2,low=F)
mu.z[abs(z)>qnorm(min.p/2,low=F)]<-z[abs(z)>qnorm(min.p/2,low=F)]
mu.z
}
## load IMPC summary stats 7.0
IMPC.summary.stats.file <- "/Users/leed1/Dropbox/KOMP/results/IMPC_Summary_Stat_Study/R_input/version_7.0/IMPC_ALL_statistical_results.csv"
IMPC.summary.stats <- as.data.frame(fread(IMPC.summary.stats.file, header=TRUE, sep=","))
Warning in require_bit64(): Some columns are type 'integer64' but
package bit64 is not installed. Those columns will print as strange
looking floating point data. There is no need to reload the data. Simply
install.packages('bit64') to obtain the integer64 print method and print
the data again.
dim(IMPC.summary.stats)
[1] 1708110 87
head(IMPC.summary.stats)
phenotyping_center intercept_estimate procedure_id
1 WTSI 7.919382 114
2 WTSI 6.436838 114
3 WTSI 3.853207 114
4 WTSI 80.785790 70
5 WTSI 3.285023 114
6 WTSI 18.740977 114
mutant_biological_model_id rotated_residuals_test weight_effect_p_value
1 39752 5.520349e-07 8.967851e-14
2 42121 2.006127e-04 3.588903e-07
3 42121 1.059334e-04 5.557203e-07
4 39056 7.370000e-10 2.804626e-24
5 42121 1.209257e-02 3.673506e-06
6 39752 9.680586e-08 2.349812e-12
male_mutant_count pipeline_stable_key female_ko_effect_p_value
1 7 15 0.05357361
2 7 15 NA
3 7 15 NA
4 7 15 NA
5 7 15 NA
6 7 15 0.02377700
pipeline_stable_id parameter_stable_key data_type
1 MGP_001 5556 unidimensional
2 MGP_001 5556 unidimensional
3 MGP_001 5557 unidimensional
4 MGP_001 1999 unidimensional
5 MGP_001 5564 unidimensional
6 MGP_001 5553 unidimensional
parameter_stable_id interaction_significant strain_accession_id
1 MGP_BCI_004_001 TRUE MGI:2159965
2 MGP_BCI_004_001 FALSE MGI:2159965
3 MGP_BCI_005_001 FALSE MGI:2159965
4 IMPC_HWT_008_001 FALSE MGI:2159965
5 MGP_BCI_012_001 FALSE MGI:2159965
6 MGP_BCI_001_001 TRUE MGI:2159965
control_selection_method parameter_name
1 baseline_all T cell CD8+ percentage
2 baseline_all T cell CD8+ percentage
3 baseline_all NK cell percentage
4 baseline_all Heart weight
5 baseline_all Monocyte percentage
6 baseline_all T cell CD3+ percentage
allele_name
1 targeted mutation 1e, Wellcome Trust Sanger Institute
2 targeted mutation 1a, Wellcome Trust Sanger Institute
3 targeted mutation 1a, Wellcome Trust Sanger Institute
4 Herc1<em2(IMPC)Wtsi>
5 targeted mutation 1a, Wellcome Trust Sanger Institute
6 targeted mutation 1e, Wellcome Trust Sanger Institute
phenotyping_center_id weight_effect_stderr_estimate
1 3 0.010482887
2 3 0.012207922
3 3 0.008339901
4 3 0.129415440
5 3 0.029762697
6 3 0.023173510
weight_effect_parameter_estimate procedure_stable_id status
1 -0.07908653 MGP_BCI_001 Success
2 -0.06290950 MGP_BCI_001 Success
3 -0.04226051 MGP_BCI_001 Success
4 1.32989546 IMPC_HWT_001 Success
5 0.13920854 MGP_BCI_001 Success
6 -0.16420989 MGP_BCI_001 Success
sex_effect_parameter_estimate female_ko_effect_stderr_estimate
1 -0.9604245 0.5265205
2 NA NA
3 NA NA
4 13.9589694 NA
5 NA NA
6 -2.8800528 1.1522825
female_percentage_change group_2_residuals_normality_test
1 -19.65% 0.8668054
2 NA
3 NA
4 -4.28% 0.8173343
5 NA
6 -20.71% 0.5844848
marker_accession_id mp_term_name
1 MGI:97401
2 MGI:1920864 increased CD8-positive, alpha-beta T cell number
3 MGI:1920864 decreased NK cell number
4 MGI:2384589 decreased heart weight
5 MGI:1920864 decreased monocyte cell number
6 MGI:97401
group_1_residuals_normality_test genotype_effect_p_value
1 4.799868e-06 NA
2 2.396440e-04 0.5178283
3 1.763395e-05 0.9508662
4 7.370000e-10 0.3187274
5 2.706512e-02 0.9513060
6 6.641736e-07 NA
dependent_variable resource_name project_id
1 MGP_BCI_004_001 3i 1
2 MGP_BCI_004_001 3i 1
3 MGP_BCI_005_001 3i 1
4 IMPC_HWT_008_001 IMPC 1
5 MGP_BCI_012_001 3i 1
6 MGP_BCI_001_001 3i 1
procedure_name
1 Buffy coat peripheral blood leukocyte immunophenotyping
2 Buffy coat peripheral blood leukocyte immunophenotyping
3 Buffy coat peripheral blood leukocyte immunophenotyping
4 Heart Weight
5 Buffy coat peripheral blood leukocyte immunophenotyping
6 Buffy coat peripheral blood leukocyte immunophenotyping
doc_id top_level_mp_term_id allele_accession_id
1 MGP_BCI_004_001_CONT_173572 MGI:4432497
2 MGP_BCI_004_001_CONT_173558 MP:0005387,MP:0005397 MGI:4820151
3 MGP_BCI_005_001_CONT_173559 MP:0005387,MP:0005397 MGI:4820151
4 IMPC_HWT_008_001_CONT_173567 MP:0005385 NULL-B69294AE7
5 MGP_BCI_012_001_CONT_173568 MP:0005387,MP:0005397 MGI:4820151
6 MGP_BCI_001_001_CONT_173569 MGI:4432497
blups_test null_test_p_value p_value marker_symbol
1 0.0502391167 0.10880752 0.10880752 Ocm
2 0.0130636962 0.52360985 0.52360985 Rhox13
3 0.0065370951 0.95017310 0.95017310 Rhox13
4 0.0383806459 0.31780686 0.31780686 Herc1
5 0.0005666044 0.94987095 0.94987095 Rhox13
6 0.0256748340 0.05645537 0.05645537 Ocm
control_biological_model_id pipeline_name sex
1 44158 MGP Select Pipeline
2 44158 MGP Select Pipeline
3 44158 MGP Select Pipeline
4 44158 MGP Select Pipeline
5 44158 MGP Select Pipeline
6 44158 MGP Select Pipeline
interaction_effect_p_value colony_id project_name
1 0.04424032 MCRQ EUMODIC
2 NA MEEK EUMODIC
3 NA MEEK EUMODIC
4 0.23801188 DABQ EUMODIC
5 NA MEEK EUMODIC
6 0.02587987 MCRQ EUMODIC
female_ko_parameter_estimate female_mutant_count organisation_id
1 -1.017331 7 3
2 NA 0 3
3 NA 0 3
4 NA 7 3
5 NA 0 3
6 -2.608309 7 3
external_db_id female_control_count
1 23 648
2 23 0
3 23 0
4 22 1312
5 23 0
6 23 648
intermediate_mp_term_id
1
2 MP:0000717,MP:0011180,MP:0011181,MP:0000218,MP:0002619,MP:0000716,MP:0008246,MP:0000217,MP:0008247,MP:0000685,MP:0002435,MP:0005015,MP:0002396,MP:0005013,MP:0006387,MP:0008037,MP:0005010,MP:0008077,MP:0012764,MP:0012763,MP:0012762,MP:0013656
3 MP:0000717,MP:0011182,MP:0011180,MP:0002619,MP:0000716,MP:0008246,MP:0000217,MP:0008247,MP:0000221,MP:0000685,MP:0002396,MP:0005016,MP:0005068,MP:0008043,MP:0013656
4 MP:0002188,MP:0005406,MP:0004857,MP:0000266,MP:0002127
5 MP:0011182,MP:0011180,MP:0012441,MP:0008250,MP:0008251,MP:0000716,MP:0000217,MP:0008246,MP:0002620,MP:0008247,MP:0000221,MP:0008248,MP:0000685,MP:0002396,MP:0013658,MP:0013656
6
db_id male_ko_effect_p_value
1 173572 0.5413314
2 173558 NA
3 173559 NA
4 173567 NA
5 173568 NA
6 173569 0.5972174
top_level_mp_term_name
1
2 immune system phenotype,hematopoietic system phenotype
3 immune system phenotype,hematopoietic system phenotype
4 cardiovascular system phenotype
5 immune system phenotype,hematopoietic system phenotype
6
metadata_group sex_effect_stderr_estimate zygosity
1 efa275aa2db369c903a3a475a3334118 0.08469694 homozygote
2 efa275aa2db369c903a3a475a3334118 NA hemizygote
3 efa275aa2db369c903a3a475a3334118 NA hemizygote
4 9b9c86eb507368735074dbf8e37a664e 1.08084287 homozygote
5 efa275aa2db369c903a3a475a3334118 NA hemizygote
6 efa275aa2db369c903a3a475a3334118 0.18730719 homozygote
male_percentage_change sex_effect_p_value mp_term_id
1 4.81% 2.216068e-28
2 4.92% NA MP:0008078
3 -0.56% NA MP:0008045
4 -4.28% 5.891995e-37 MP:0002834
5 -0.66% NA MP:0000223
6 3.78% 7.968806e-49
male_ko_effect_stderr_estimate additional_information
1 0.4076059 NA
2 NA NA
3 NA NA
4 NA NA
5 NA NA
6 0.9010994 NA
statistical_method
1 Mixed Model framework, linear mixed-effects model, equation withWeight
2 Mixed Model framework, linear mixed-effects model, equation withWeight
3 Mixed Model framework, linear mixed-effects model, equation withWeight
4 Mixed Model framework, linear mixed-effects model, equation withWeight
5 Mixed Model framework, linear mixed-effects model, equation withWeight
6 Mixed Model framework, linear mixed-effects model, equation withWeight
_version_ intercept_estimate_stderr_estimate male_control_count
1 5.569978e-202 0.2813955 640
2 5.569978e-202 0.3919558 640
3 5.569978e-202 0.2693916 633
4 5.569978e-202 3.3907505 1334
5 5.569978e-202 0.9500200 639
6 5.569978e-202 0.6197159 641
intermediate_mp_term_name
1
2 abnormal lymphocyte cell number,abnormal hematopoietic cell number,increased hematopoietic cell number,increased leukocyte cell number,abnormal lymphocyte morphology,abnormal immune system cell morphology,abnormal leukocyte morphology,abnormal leukocyte cell number,abnormal mononuclear cell morphology,abnormal immune system morphology,abnormal effector T cell morphology,increased T cell number,abnormal hematopoietic system morphology/development,increased lymphocyte cell number,abnormal T cell number,abnormal T cell morphology,abnormal CD8-positive\\, alpha beta T cell morphology,abnormal CD8-positive\\, alpha-beta T cell number,increased alpha-beta T cell number,abnormal alpha-beta T cell number,abnormal alpha-beta T cell morphology,abnormal hematopoietic cell morphology
3 abnormal lymphocyte cell number,decreased hematopoietic cell number,abnormal hematopoietic cell number,abnormal lymphocyte morphology,abnormal immune system cell morphology,abnormal leukocyte morphology,abnormal leukocyte cell number,abnormal mononuclear cell morphology,decreased leukocyte cell number,abnormal immune system morphology,abnormal hematopoietic system morphology/development,decreased lymphocyte cell number,abnormal NK cell morphology,abnormal NK cell number,abnormal hematopoietic cell morphology
4 small heart,abnormal heart size,abnormal heart weight,abnormal heart morphology,abnormal cardiovascular system morphology
5 decreased hematopoietic cell number,abnormal hematopoietic cell number,abnormal monocyte cell number,abnormal myeloid leukocyte morphology,abnormal phagocyte morphology,abnormal immune system cell morphology,abnormal leukocyte cell number,abnormal leukocyte morphology,abnormal monocyte morphology,abnormal mononuclear cell morphology,decreased leukocyte cell number,abnormal mononuclear phagocyte morphology,abnormal immune system morphology,abnormal hematopoietic system morphology/development,abnormal myeloid cell morphology,abnormal hematopoietic cell morphology
6
strain_name classification_tag
1 C57BL/6N If phenotype is significant - can not classify effect
2 C57BL/6N If phenotype is significant it is for the one sex tested
3 C57BL/6N If phenotype is significant it is for the one sex tested
4 C57BL/6N If phenotype is significant - both sexes equally
5 C57BL/6N If phenotype is significant it is for the one sex tested
6 C57BL/6N If phenotype is significant - females only
effect_size procedure_stable_key allele_symbol resource_id
1 NA 215 Ocm<tm1e(EUCOMM)Wtsi> 23
2 NA 215 Rhox13<tm1a(KOMP)Wtsi> 23
3 NA 215 Rhox13<tm1a(KOMP)Wtsi> 23
4 NA 100 Herc1<em2(IMPC)Wtsi> 22
5 NA 215 Rhox13<tm1a(KOMP)Wtsi> 23
6 NA 215 Ocm<tm1e(EUCOMM)Wtsi> 23
group_2_genotype variance_significant pipeline_id group_1_genotype
1 MCRQ TRUE 8 +/+
2 MEEK TRUE 8 +/+
3 MEEK TRUE 8 +/+
4 DABQ TRUE 8 +/+
5 MEEK TRUE 8 +/+
6 MCRQ TRUE 8 +/+
male_ko_parameter_estimate genotype_effect_parameter_estimate categories
1 0.2490375 NA
2 NA 0.21929968
3 NA -0.01424171
4 NA -5.39357115
5 NA -0.05062311
6 0.4762738 NA
parameter_id batch_significant genotype_effect_stderr_estimate
1 3559 TRUE NA
2 3559 TRUE 0.3388865
3 3560 TRUE 0.2310174
4 2278 TRUE 5.4082778
5 3567 TRUE 0.8285954
6 3556 TRUE NA
resource_fullname
1 Immunology
2 Immunology
3 Immunology
4 International Mouse Phenotyping Consortium
5 Immunology
6 Immunology
## Prep KOMP data
summ.data <- IMPC.summary.stats
#table(summ.data$parameter_name)
summ.data$marker_symbol_combined <- paste0(summ.data$marker_symbol,"_",summ.data$zygosity)
#table(bnp.data$marker_symbol_combined)
#table(bnp.data$marker_symbol_combined)[table(bnp.data$marker_symbol_combined)>6]
#test <- subset(bnp.data, marker_symbol_combined=="Otub1_heterozygote")
#duplicated(test[,1:33,35:88]) ## 34
#colnames(test)[34]
dim(summ.data)
[1] 1708110 88
summ.data <- summ.data[,c("procedure_name","parameter_name","marker_symbol","marker_symbol_combined",
"genotype_effect_parameter_estimate","genotype_effect_stderr_estimate",
"genotype_effect_p_value")]
summ.data$zscore <- (summ.data$genotype_effect_parameter_estimate/summ.data$genotype_effect_stderr_estimate) # Z-score
summ.data$phenotype <- paste0(summ.data$procedure_name,"_",summ.data$parameter_name)
summary(summ.data$zscore)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-37.2 -1.0 -0.1 -0.1 0.9 32.3 1226383
head(summ.data)
procedure_name
1 Buffy coat peripheral blood leukocyte immunophenotyping
2 Buffy coat peripheral blood leukocyte immunophenotyping
3 Buffy coat peripheral blood leukocyte immunophenotyping
4 Heart Weight
5 Buffy coat peripheral blood leukocyte immunophenotyping
6 Buffy coat peripheral blood leukocyte immunophenotyping
parameter_name marker_symbol marker_symbol_combined
1 T cell CD8+ percentage Ocm Ocm_homozygote
2 T cell CD8+ percentage Rhox13 Rhox13_hemizygote
3 NK cell percentage Rhox13 Rhox13_hemizygote
4 Heart weight Herc1 Herc1_homozygote
5 Monocyte percentage Rhox13 Rhox13_hemizygote
6 T cell CD3+ percentage Ocm Ocm_homozygote
genotype_effect_parameter_estimate genotype_effect_stderr_estimate
1 NA NA
2 0.21929968 0.3388865
3 -0.01424171 0.2310174
4 -5.39357115 5.4082778
5 -0.05062311 0.8285954
6 NA NA
genotype_effect_p_value zscore
1 NA NA
2 0.5178283 0.64711836
3 0.9508662 -0.06164776
4 0.3187274 -0.99728072
5 0.9513060 -0.06109510
6 NA NA
phenotype
1 Buffy coat peripheral blood leukocyte immunophenotyping_T cell CD8+ percentage
2 Buffy coat peripheral blood leukocyte immunophenotyping_T cell CD8+ percentage
3 Buffy coat peripheral blood leukocyte immunophenotyping_NK cell percentage
4 Heart Weight_Heart weight
5 Buffy coat peripheral blood leukocyte immunophenotyping_Monocyte percentage
6 Buffy coat peripheral blood leukocyte immunophenotyping_T cell CD3+ percentage
summ.data$fdr <- p.adjust(summ.data$genotype_effect_p_value, method="BH",
n= length(na.omit(summ.data$genotype_effect_p_value)))
dim(summ.data)
[1] 1708110 10
summ.data <- na.omit(summ.data)
dim(summ.data)
[1] 481727 10
sdf <- ddply(summ.data, c("procedure_name","phenotype")
, summarise,
mean=mean(zscore, na.rm=TRUE), sd=sd(zscore, na.rm=TRUE),
n= sum(!is.na(zscore)),
#tstat=t.test(zscore)$statistic,
#pval=t.test(zscore)$p.value,
num.sig.genes = sum(fdr<0.05, na.rm=TRUE))
sdf <- sdf[order(sdf$mean),]
summ.data$phenotype <- factor(summ.data$phenotype, levels=sdf$phenotype)
p1 <- ggplot(summ.data, aes(phenotype, zscore, title=""))
p1 <- p1 + geom_boxplot()
p1 <- p1 + geom_hline(yintercept = 0, color="red")
p1 <- p1 + geom_hline(yintercept = -2, color="blue") ## z=1.96 for two-sided test at alpha=0.05
p1 <- p1 + geom_hline(yintercept = 2, color="blue") ## z=-1.96 for two-sided test
p1 <- p1 + ggtitle("Association Z-score Non-centrality")
p1 <- p1 + xlab("Domain & Phenotype")
p1 <- p1 + ylab("Phenotype-Genotype Association Z-score")
p1 <- p1 + coord_flip()
p1

| Version | Author | Date |
|---|---|---|
| 8378b5b | dleelab | 2018-06-07 |
sdf.f <- subset(sdf, n>=10)
p <- ggplot(sdf.f, aes(x=mean, y=num.sig.genes, text = phenotype))
p <- p + geom_point(size=2, aes(colour = procedure_name))
p <- p + geom_smooth(method = "loess", size=1, aes(group=1))
p <- p + labs(y= "# of Sig Genes", x= "Z-score Mean Per Phenotype")
p <- p + theme_bw()
p <- p + theme(plot.background = element_blank() ,
panel.border = element_blank(),
panel.background = element_blank())
p <- p + theme(axis.line = element_line(color = 'black'))
p <- p + theme(axis.title.x = element_text(size = 15, vjust= 0))
p <- p + theme(axis.title.y = element_text(size = 15, vjust= 0))
p <- p + theme(strip.text.x = element_text(size = 8))
#p
ggplotly(p)
#ggplotly(p, height=800, width=1200)
sdf.f <- subset(sdf, n>=10)
p <- ggplot(sdf.f, aes(x=mean, y=100*sdf.f$num.sig.genes/sdf.f$n, text = phenotype))
p <- p + geom_point(size=2, aes(colour = procedure_name))
p <- p + geom_smooth(method = "loess", size=1, aes(group=1))
p <- p + labs(y= "Prop. Sig Genes", x= "Z-score Mean Per Phenotype")
p <- p + theme_bw()
p <- p + theme(plot.background = element_blank() ,
panel.border = element_blank(),
panel.background = element_blank())
p <- p + theme(axis.line = element_line(color = 'black'))
p <- p + theme(axis.title.x = element_text(size = 15, vjust= 0))
p <- p + theme(axis.title.y = element_text(size = 15, vjust= 0))
p <- p + theme(strip.text.x = element_text(size = 8))
#p
ggplotly(p)
#ggplotly(p, height=800, width=1200)
sdfp <- ddply(summ.data, c("marker_symbol","marker_symbol_combined")
, summarise,
mean=mean(zscore, na.rm=TRUE), sd=sd(zscore, na.rm=TRUE),
n= sum(!is.na(zscore)),
#tstat=t.test(zscore)$statistic,
#pval=t.test(zscore)$p.value,
num.sig.genes = sum(fdr<0.05, na.rm=TRUE))
sdfp <- sdfp[order(sdfp$mean),]
summ.data$marker_symbol_combined <- factor(summ.data$marker_symbol_combined, levels=sdfp$marker_symbol_combined)
plot(sdfp$mean, sdfp$n)

| Version | Author | Date |
|---|---|---|
| 8378b5b | dleelab | 2018-06-07 |
p1 <- ggplot(summ.data, aes(marker_symbol_combined, zscore, title=""))
p1 <- p1 + geom_boxplot()
p1 <- p1 + geom_hline(yintercept = 0, color="red")
p1 <- p1 + geom_hline(yintercept = -2, color="blue") ## z=1.96 for two-sided test at alpha=0.05
p1 <- p1 + geom_hline(yintercept = 2, color="blue") ## z=-1.96 for two-sided test
p1 <- p1 + ggtitle("Association Z-score Non-centrality")
p1 <- p1 + xlab("Knockout Gene")
p1 <- p1 + ylab("Phenotype-Genotype Association Z-score")
p1 <- p1 + coord_flip()
p1

| Version | Author | Date |
|---|---|---|
| 8378b5b | dleelab | 2018-06-07 |
dim(sdfp)
[1] 5294 6
sdfp.f <- subset(sdfp, n>=10)
dim(sdfp.f)
[1] 5125 6
p <- ggplot(sdfp.f, aes(x=mean, y=100*sdfp.f$num.sig.genes/sdfp.f$n, text = marker_symbol_combined))
p <- p + geom_point(size=2)
#p <- p + geom_point(size=2, aes(colour = procedure_name))
p <- p + geom_smooth(method = "loess", size=1, aes(group=1))
p <- p + labs(y= "Prop. Sig Phenotypes", x= "Z-score Mean Per Gene")
p <- p + theme_bw()
p <- p + theme(plot.background = element_blank() ,
panel.border = element_blank(),
panel.background = element_blank())
p <- p + theme(axis.line = element_line(color = 'black'))
p <- p + theme(axis.title.x = element_text(size = 15, vjust= 0))
p <- p + theme(axis.title.y = element_text(size = 15, vjust= 0))
p <- p + theme(strip.text.x = element_text(size = 8))
#p
ggplotly(p)
#ggplotly(p, height=800, width=1200)
dim(summ.data)
[1] 481727 10
hist(sdf$n)

| Version | Author | Date |
|---|---|---|
| 8378b5b | dleelab | 2018-06-07 |
dim(sdf)
[1] 561 6
sdf.100 <- droplevels(subset(sdf, n>=100))
sdf.100$phenotype <- as.factor(sdf.100$phenotype)
dim(sdf.100)
[1] 343 6
summ.zscore.df <- summ.data[,c("procedure_name","phenotype","marker_symbol_combined","zscore")]
dim(summ.zscore.df)
[1] 481727 4
summ.zscore.df <- summ.zscore.df[summ.zscore.df$phenotype %in% levels(sdf.100$phenotype),]
summ.zscore.df <- droplevels(summ.zscore.df)
dim(summ.zscore.df)
[1] 478158 4
summ.zscore.df <- dcast(data=summ.zscore.df, formula= phenotype~marker_symbol_combined, fun.aggregate = sum, value.var = "zscore")
dim(summ.zscore.df)
[1] 343 5273
head(summ.zscore.df[,1:5])
phenotype Epc1_homozygote
1 Eye Morphology_Mean right eye lens density 0
2 Eye Morphology_Mean left eye lens density 0
3 Eye Morphology_Max left eye lens density 0
4 Eye Morphology_Max right eye lens density 0
5 Eye Morphology_Min right eye lens density 0
6 Eye Morphology_Min left eye lens density 0
Tmprss6_homozygote Baz1b_homozygote Nsun2_homozygote
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
5 0 0 0
6 0 0 0
summ.zscore.mat <- as.matrix(summ.zscore.df[,-1])
rownames(summ.zscore.mat) <- summ.zscore.df$phenotype
head(summ.zscore.mat[,1:5])
Epc1_homozygote
Eye Morphology_Mean right eye lens density 0
Eye Morphology_Mean left eye lens density 0
Eye Morphology_Max left eye lens density 0
Eye Morphology_Max right eye lens density 0
Eye Morphology_Min right eye lens density 0
Eye Morphology_Min left eye lens density 0
Tmprss6_homozygote
Eye Morphology_Mean right eye lens density 0
Eye Morphology_Mean left eye lens density 0
Eye Morphology_Max left eye lens density 0
Eye Morphology_Max right eye lens density 0
Eye Morphology_Min right eye lens density 0
Eye Morphology_Min left eye lens density 0
Baz1b_homozygote
Eye Morphology_Mean right eye lens density 0
Eye Morphology_Mean left eye lens density 0
Eye Morphology_Max left eye lens density 0
Eye Morphology_Max right eye lens density 0
Eye Morphology_Min right eye lens density 0
Eye Morphology_Min left eye lens density 0
Nsun2_homozygote
Eye Morphology_Mean right eye lens density 0
Eye Morphology_Mean left eye lens density 0
Eye Morphology_Max left eye lens density 0
Eye Morphology_Max right eye lens density 0
Eye Morphology_Min right eye lens density 0
Eye Morphology_Min left eye lens density 0
Zfp106_homozygote
Eye Morphology_Mean right eye lens density 0
Eye Morphology_Mean left eye lens density 0
Eye Morphology_Max left eye lens density 0
Eye Morphology_Max right eye lens density 0
Eye Morphology_Min right eye lens density 0
Eye Morphology_Min left eye lens density 0
dim(summ.zscore.mat)
[1] 343 5272
pheno.cor.mat <- cor(t(summ.zscore.mat), use="pairwise")
pheatmap(pheno.cor.mat)

| Version | Author | Date |
|---|---|---|
| 8378b5b | dleelab | 2018-06-07 |
plot_ly(z=pheno.cor.mat, type="heatmap")
pheno.cor.df <- as.data.frame(pheno.cor.mat)
heatmaply(pheno.cor.df, k_row=10, k_col=10, plot_method="plotly")